Read in, format, and visualize data.

# Read in target data

target_data <- readr::read_csv("https://data.ecoforecast.org/targets/terrestrial/terrestrial_30min-targets.csv.gz", guess_max = 1e6)
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   time = col_datetime(format = ""),
##   siteID = col_character(),
##   nee = col_double(),
##   le = col_double(),
##   nee_sd_intercept = col_double(),
##   nee_sd_slopeP = col_double(),
##   nee_sd_slopeN = col_double(),
##   le_sd_intercept = col_double(),
##   le_sd_slopeP = col_double(),
##   le_sd_slopeN = col_double(),
##   vswc = col_double(),
##   vswc_sd = col_double()
## )
# Read in gap-filled temperature data

temp_data <- read.csv(text = getURL("https://raw.githubusercontent.com/eco4cast-class-VT/VT_NEET/master/temp_gapfill.csv"))

# Convert time column from character to POSIXct

# Time zone in target data is UTC according to https://docs.google.com/document/d/1l7sxBk-z-GHTlk50rdxP0lPTwJzFJ2gykclINkMsWcc/edit

target_data$time <- ymd_hms(target_data$time, tz = "UTC")
temp_data$startDateTime <- ymd_hms(temp_data$startDateTime, tz = "UTC")

# Convert siteID column from character to factor

target_data$siteID <- as.factor(target_data$siteID)
temp_data$siteID <- as.factor(temp_data$siteID)

# Plot all data

ggplot(data = target_data) +
  geom_point(aes(x = time, y = nee, color = siteID)) +
  facet_grid(siteID ~ .) + 
  labs(title = "Full Time Period: 01 Feb 2017 - 31 Jan 2021",
       x = "Time (UTC)",
       y = expression(paste("NEE (",mu, "mol" ~ CO[2] ~ m^-2 ~ s^-1,")"))) +
  theme_bw() +
  theme(legend.position = "none")

# Plot data before forecast time period

ggplot(data = filter(target_data,
                     time >= "2021-02-01",
                     time < "2021-03-01")) +
  geom_point(aes(x = time, y = nee, color = siteID)) +
  facet_grid(siteID ~ .) + 
  labs(title = "Example Time Period: 01 Feb 2021 - 01 Mar 2021",
       x = "Time (UTC)",
       y = expression(paste("NEE (",mu, "mol" ~ CO[2] ~ m^-2 ~ s^-1,")"))) +
  theme_bw() +
  theme(legend.position = "none")

Construct temperature dynamic linear model (DLM) for NIMBLE.

TempDLM <- nimbleCode({
  
  # Note:
  # x = real NEE (state variable)
  # y = observed NEE
  
  # Priors
  
  x[1] ~ dnorm(x_ic, sd = sd_ic)
  sd_add ~ dunif(0, 100)
  b1 ~ dnorm(0, sd = 100)  # Temperature effect coefficient
  
  # Process model
  
  for(t in 2:n){
    pred[t] <- x[t-1] + b1 * temp[t]
    x[t] ~ dnorm(pred[t], sd = sd_add)
  }
  
  # Data model
  
  for(t in 1:n){
    y[t] ~ dnorm(x[t], sd = sd_obs[t])
  }
  
})

Because of the size of the data set, a subset of the data had to be used for model fitting. For forecasting, the data subset consisted of data from the time period immediately before the forecast period. The variables controlling the size of the data subset are specified below.

# Note: B/c of size of data set, need to use subset of data

subset_length_days <- 21  # Length of subset [d]

subset_length_timesteps <- subset_length_days * 24 * 2  # Number of time steps in subset (days x hours/day x half hours/hour)

Specify data from BART to be used for model fitting.

# Separate data by site

target_data_BART <- filter(target_data, siteID == "BART")
temp_data_BART <- filter(temp_data, siteID == "BART")

# Specify time period end date and time

end_date_time_BART <- ymd_hms("2021-02-28 23:30:00", tz = "UTC")

# Match time period end date and time to index value

end_i_BART <- match(end_date_time_BART, target_data_BART$time)

# Calculate time period start data and time index value

start_i_BART <- end_i_BART - subset_length_timesteps + 1

# Subset data

target_data_BART_sub <- target_data_BART[start_i_BART:end_i_BART,]

# Interpolate NEE values to estimate sd_obs

nee_interp_BART_sub <- approx(x = target_data_BART_sub$time[!is.na(target_data_BART_sub$nee)],
                              y = target_data_BART_sub$nee[!is.na(target_data_BART_sub$nee)],
                              xout = target_data_BART_sub$time,
                              method = "linear",
                              yleft = mean(target_data_BART_sub$nee, na.rm = TRUE),
                              yright = mean(target_data_BART_sub$nee, na.rm = TRUE),
                              na.rm = TRUE)

sd_obs_BART_sub <- rep(NA, length(target_data_BART_sub$nee))  # Initialize sd_obs vector

for(i in 1:length(sd_obs_BART_sub)){
  
  if(nee_interp_BART_sub$y[i] >= 0){  # Calculate sd_obs if NEE is positive
    
    sd_obs_BART_sub[i] <- target_data_BART_sub$nee_sd_intercept[1] + target_data_BART_sub$ nee_sd_slopeP[1] * nee_interp_BART_sub$y[i]  # Note: Since intercept and slope do not change, can just use first value of intercept and slope values
    
  }else{  # Calculate sd_obs if NEE is negative
    
    sd_obs_BART_sub[i] <- target_data_BART_sub$nee_sd_intercept[1] + target_data_BART_sub$ nee_sd_slopeN[1] * nee_interp_BART_sub$y[i]
    
  }
  
}

# Match temperature data to subset NEE data

BART_temp_i <- match(target_data_BART_sub$time, temp_data_BART$startDateTime)
temp_data_BART_sub <- temp_data_BART[BART_temp_i,]

# Specify sd_obs modifier (to account for lower NEE in winter)

sd_obs_mod_BART <- 0.125

# Specify data

data_TempDLM_BART <- list(y = target_data_BART_sub$nee,
                          temp = temp_data_BART_sub$temp,
                          sd_obs = sd_obs_BART_sub * sd_obs_mod_BART)

Specify data from KONZ to be used for model fitting.

# Separate data by site

target_data_KONZ <- filter(target_data, siteID == "KONZ")
temp_data_KONZ <- filter(temp_data, siteID == "KONZ")

# Specify time period end date and time

end_date_time_KONZ <- ymd_hms("2021-02-28 23:30:00", tz = "UTC")

# Match time period end date and time to index value

end_i_KONZ <- match(end_date_time_KONZ, target_data_KONZ$time)

# Calculate time period start data and time index value

start_i_KONZ <- end_i_KONZ - subset_length_timesteps + 1

# Subset data

target_data_KONZ_sub <- target_data_KONZ[start_i_KONZ:end_i_KONZ,]

# Interpolate NEE values to estimate sd_obs

nee_interp_KONZ_sub <- approx(x = target_data_KONZ_sub$time[!is.na(target_data_KONZ_sub$nee)],
                              y = target_data_KONZ_sub$nee[!is.na(target_data_KONZ_sub$nee)],
                              xout = target_data_KONZ_sub$time,
                              method = "linear",
                              yleft = mean(target_data_KONZ_sub$nee, na.rm = TRUE),
                              yright = mean(target_data_KONZ_sub$nee, na.rm = TRUE),
                              na.rm = TRUE)

sd_obs_KONZ_sub <- rep(NA, length(target_data_KONZ_sub$nee))  # Initialize sd_obs vector

for(i in 1:length(sd_obs_KONZ_sub)){
  
  if(nee_interp_KONZ_sub$y[i] >= 0){  # Calculate sd_obs if NEE is positive
    
    sd_obs_KONZ_sub[i] <- target_data_KONZ_sub$nee_sd_intercept[1] + target_data_KONZ_sub$ nee_sd_slopeP[1] * nee_interp_KONZ_sub$y[i]  # Note: Since intercept and slope do not change, can just use first value of intercept and slope values
    
  }else{  # Calculate sd_obs if NEE is negative
    
    sd_obs_KONZ_sub[i] <- target_data_KONZ_sub$nee_sd_intercept[1] + target_data_KONZ_sub$ nee_sd_slopeN[1] * nee_interp_KONZ_sub$y[i]
    
  }
  
}

# Match temperature data to subset NEE data

KONZ_temp_i <- match(target_data_KONZ_sub$time, temp_data_KONZ$startDateTime)
temp_data_KONZ_sub <- temp_data_KONZ[KONZ_temp_i,]

# Specify sd_obs modifier (to account for lower NEE in winter)

sd_obs_mod_KONZ <- 0.075

# Specify data

data_TempDLM_KONZ <- list(y = target_data_KONZ_sub$nee,
                          temp = temp_data_KONZ_sub$temp,
                          sd_obs = sd_obs_KONZ_sub * sd_obs_mod_KONZ)

Specify data from OSBS to be used for model fitting.

# Separate data by site

target_data_OSBS <- filter(target_data, siteID == "OSBS")
temp_data_OSBS <- filter(temp_data, siteID == "OSBS")

# Specify time period end date and time

end_date_time_OSBS <- ymd_hms("2021-02-28 23:30:00", tz = "UTC")

# Match time period end date and time to index value

end_i_OSBS <- match(end_date_time_OSBS, target_data_OSBS$time)

# Calculate time period start data and time index value

start_i_OSBS <- end_i_OSBS - subset_length_timesteps + 1

# Subset data

target_data_OSBS_sub <- target_data_OSBS[start_i_OSBS:end_i_OSBS,]

# Interpolate NEE values to estimate sd_obs

nee_interp_OSBS_sub <- approx(x = target_data_OSBS_sub$time[!is.na(target_data_OSBS_sub$nee)],
                              y = target_data_OSBS_sub$nee[!is.na(target_data_OSBS_sub$nee)],
                              xout = target_data_OSBS_sub$time,
                              method = "linear",
                              yleft = mean(target_data_OSBS_sub$nee, na.rm = TRUE),
                              yright = mean(target_data_OSBS_sub$nee, na.rm = TRUE),
                              na.rm = TRUE)

sd_obs_OSBS_sub <- rep(NA, length(target_data_OSBS_sub$nee))  # Initialize sd_obs vector

for(i in 1:length(sd_obs_OSBS_sub)){
  
  if(nee_interp_OSBS_sub$y[i] >= 0){  # Calculate sd_obs if NEE is positive
    
    sd_obs_OSBS_sub[i] <- target_data_OSBS_sub$nee_sd_intercept[1] + target_data_OSBS_sub$ nee_sd_slopeP[1] * nee_interp_OSBS_sub$y[i]  # Note: Since intercept and slope do not change, can just use first value of intercept and slope values
    
  }else{  # Calculate sd_obs if NEE is negative
    
    sd_obs_OSBS_sub[i] <- target_data_OSBS_sub$nee_sd_intercept[1] + target_data_OSBS_sub$ nee_sd_slopeN[1] * nee_interp_OSBS_sub$y[i]
    
  }
  
}

# Match temperature data to subset NEE data

OSBS_temp_i <- match(target_data_OSBS_sub$time, temp_data_OSBS$startDateTime)
temp_data_OSBS_sub <- temp_data_OSBS[OSBS_temp_i,]

# Specify sd_obs modifier (generally not needed for OSBS b/c of relatively high NEE in winter)

sd_obs_mod_OSBS <- 1

# Specify data

data_TempDLM_OSBS <- list(y = target_data_OSBS_sub$nee,
                          temp = temp_data_OSBS_sub$temp,
                          sd_obs = sd_obs_OSBS_sub * sd_obs_mod_OSBS)

Specify data from SRER to be used for model fitting.

# Separate data by site

target_data_SRER <- filter(target_data, siteID == "SRER")
temp_data_SRER <- filter(temp_data, siteID == "SRER")

# Specify time period end date and time

end_date_time_SRER <- ymd_hms("2021-02-28 23:30:00", tz = "UTC")

# Match time period end date and time to index value

end_i_SRER <- match(end_date_time_SRER, target_data_SRER$time)

# Calculate time period start data and time index value

start_i_SRER <- end_i_SRER - subset_length_timesteps + 1

# Subset data

target_data_SRER_sub <- target_data_SRER[start_i_SRER:end_i_SRER,]

# Interpolate NEE values to estimate sd_obs

nee_interp_SRER_sub <- approx(x = target_data_SRER_sub$time[!is.na(target_data_SRER_sub$nee)],
                              y = target_data_SRER_sub$nee[!is.na(target_data_SRER_sub$nee)],
                              xout = target_data_SRER_sub$time,
                              method = "linear",
                              yleft = mean(target_data_SRER_sub$nee, na.rm = TRUE),
                              yright = mean(target_data_SRER_sub$nee, na.rm = TRUE),
                              na.rm = TRUE)

sd_obs_SRER_sub <- rep(NA, length(target_data_SRER_sub$nee))  # Initialize sd_obs vector

for(i in 1:length(sd_obs_SRER_sub)){
  
  if(nee_interp_SRER_sub$y[i] >= 0){  # Calculate sd_obs if NEE is positive
    
    sd_obs_SRER_sub[i] <- target_data_SRER_sub$nee_sd_intercept[1] + target_data_SRER_sub$ nee_sd_slopeP[1] * nee_interp_SRER_sub$y[i]  # Note: Since intercept and slope do not change, can just use first value of intercept and slope values
    
  }else{  # Calculate sd_obs if NEE is negative
    
    sd_obs_SRER_sub[i] <- target_data_SRER_sub$nee_sd_intercept[1] + target_data_SRER_sub$ nee_sd_slopeN[1] * nee_interp_SRER_sub$y[i]
    
  }
  
}

# Match temperature data to subset NEE data

SRER_temp_i <- match(target_data_SRER_sub$time, temp_data_SRER$startDateTime)
temp_data_SRER_sub <- temp_data_SRER[SRER_temp_i,]

# Specify sd_obs modifier (to account for lower NEE in winter)

sd_obs_mod_SRER <- 0.15

# Specify data

data_TempDLM_SRER <- list(y = target_data_SRER_sub$nee,
                          temp = temp_data_SRER_sub$temp,
                          sd_obs = sd_obs_SRER_sub * sd_obs_mod_SRER)

Specify constants for NIMBLE.

# Specify constants

constants_BART <- list(n = length(target_data_BART_sub$nee),
                       x_ic = 0,
                       sd_ic = 1)

constants_KONZ <- list(n = length(target_data_KONZ_sub$nee),
                       x_ic = 0,
                       sd_ic = 1)

constants_OSBS <- list(n = length(target_data_OSBS_sub$nee),
                       x_ic = 0,
                       sd_ic = 1)

constants_SRER <- list(n = length(target_data_SRER_sub$nee),
                       x_ic = 0,
                       sd_ic = 1)

Specify initial conditions for temperature DLM.

# Specify number of chains

nchains = 3

# Initialize initial condition lists

inits_TempDLM_BART <- list()
inits_TempDLM_KONZ <- list()
inits_TempDLM_OSBS <- list()
inits_TempDLM_SRER <- list()

# Generate initial conditions

for(i in 1:nchains){
  
  # BART
  
  y.samp <- sample(nee_interp_BART_sub$y, length(nee_interp_BART_sub$y), replace = TRUE)
  inits_TempDLM_BART[[i]] <- list(sd_add = sd(diff(y.samp), na.rm = TRUE),
                                  x = nee_interp_BART_sub$y,
                                  b1 = rnorm(1, mean = 0, sd = 1))
  
  # KONZ
  
  y.samp <- sample(nee_interp_KONZ_sub$y, length(nee_interp_KONZ_sub$y), replace = TRUE)
  inits_TempDLM_KONZ[[i]] <- list(sd_add = sd(diff(y.samp), na.rm = TRUE),
                                  x = nee_interp_KONZ_sub$y,
                                  b1 = rnorm(1, mean = 0, sd = 1))
  
  # OSBS
  
  y.samp <- sample(nee_interp_OSBS_sub$y, length(nee_interp_OSBS_sub$y), replace = TRUE)
  inits_TempDLM_OSBS[[i]] <- list(sd_add = sd(diff(y.samp), na.rm = TRUE),
                                  x = nee_interp_OSBS_sub$y,
                                  b1 = rnorm(1, mean = 0, sd = 1))
  
  # SRER
  
  y.samp <- sample(nee_interp_SRER_sub$y, length(nee_interp_SRER_sub$y), replace = TRUE)
  inits_TempDLM_SRER[[i]] <- list(sd_add = sd(diff(y.samp), na.rm = TRUE),
                                  x = nee_interp_SRER_sub$y,
                                  b1 = rnorm(1, mean = 0, sd = 1))
  
}

Run NIMBLE for temperature DLM.

# Preliminaries

niter <- 11000
nthin <- 1

# BART

nimble_out_TempDLM_BART <- nimbleMCMC(code = TempDLM,
                                      data = data_TempDLM_BART,
                                      inits = inits_TempDLM_BART,
                                      constants = constants_BART,
                                      monitors = c("b1",
                                                   "sd_add",
                                                   "x",
                                                   "y"),
                                      niter = niter,
                                      nchains = nchains,
                                      thin = nthin,
                                      samplesAsCodaMCMC = TRUE)
## NAs were detected in model variables: y, logProb_y.
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
# KONZ

nimble_out_TempDLM_KONZ <- nimbleMCMC(code = TempDLM,
                                      data = data_TempDLM_KONZ,
                                      inits = inits_TempDLM_KONZ,
                                      constants = constants_KONZ,
                                      monitors = c("b1",
                                                   "sd_add",
                                                   "x",
                                                   "y"),
                                      niter = niter,
                                      nchains = nchains,
                                      thin = nthin,
                                      samplesAsCodaMCMC = TRUE)
## NAs were detected in model variables: y, logProb_y.
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
# OSBS

nimble_out_TempDLM_OSBS <- nimbleMCMC(code = TempDLM,
                                      data = data_TempDLM_OSBS,
                                      inits = inits_TempDLM_OSBS,
                                      constants = constants_OSBS,
                                      monitors = c("b1",
                                                   "sd_add",
                                                   "x",
                                                   "y"),
                                      niter = niter,
                                      nchains = nchains,
                                      thin = nthin,
                                      samplesAsCodaMCMC = TRUE)
## NAs were detected in model variables: y, logProb_y.
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
# SRER

nimble_out_TempDLM_SRER <- nimbleMCMC(code = TempDLM,
                                      data = data_TempDLM_SRER,
                                      inits = inits_TempDLM_SRER,
                                      constants = constants_SRER,
                                      monitors = c("b1",
                                                   "sd_add",
                                                   "x",
                                                   "y"),
                                      niter = niter,
                                      nchains = nchains,
                                      thin = nthin,
                                      samplesAsCodaMCMC = TRUE)
## NAs were detected in model variables: y, logProb_y.
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|

Visualize chains and remove burn-in for temperature DLM.

# BART

plot(nimble_out_TempDLM_BART[, c("b1")], main = "BART: b1")  # Visualize all chains

plot(nimble_out_TempDLM_BART[, c("sd_add")], main = "BART: sd_add")  # Visualize all chains

burnin_TempDLM_BART <- 2000  # Burn-in
nimble_burn_TempDLM_BART <- window(nimble_out_TempDLM_BART, start = burnin_TempDLM_BART)  # Exclude burn-in
plot(nimble_burn_TempDLM_BART[, c("b1")], main = "BART: b1 (burn-in removed)")  # Visualize chains w/o burn-in

plot(nimble_burn_TempDLM_BART[, c("sd_add")], main = "BART: sd_add (burn-in removed)")  # Visualize chains w/o burn-in

# KONZ

plot(nimble_out_TempDLM_KONZ[, c("b1")], main = "KONZ: b1")  # Visualize all chains

plot(nimble_out_TempDLM_KONZ[, c("sd_add")], main = "KONZ: sd_add")  # Visualize all chains

burnin_TempDLM_KONZ <- 2000  # Burn-in
nimble_burn_TempDLM_KONZ <- window(nimble_out_TempDLM_KONZ, start = burnin_TempDLM_KONZ)  # Exclude burn-in
plot(nimble_burn_TempDLM_KONZ[, c("b1")], main = "KONZ: b1 (burn-in removed)")  # Visualize chains w/o burn-in

plot(nimble_burn_TempDLM_KONZ[, c("sd_add")], main = "KONZ: sd_add (burn-in removed)")  # Visualize chains w/o burn-in

# OSBS

plot(nimble_out_TempDLM_OSBS[, c("b1")], main = "OSBS: b1")  # Visualize all chains

plot(nimble_out_TempDLM_OSBS[, c("sd_add")], main = "OSBS: sd_add")  # Visualize all chains

burnin_TempDLM_OSBS <- 2000  # Burn-in
nimble_burn_TempDLM_OSBS <- window(nimble_out_TempDLM_OSBS, start = burnin_TempDLM_OSBS)  # Exclude burn-in
plot(nimble_burn_TempDLM_OSBS[, c("b1")], main = "OSBS: b1 (burn-in removed)")  # Visualize chains w/o burn-in

plot(nimble_burn_TempDLM_OSBS[, c("sd_add")], main = "OSBS: sd_add (burn-in removed)")  # Visualize chains w/o burn-in

# SRER

plot(nimble_out_TempDLM_SRER[, c("b1")], main = "SRER: b1")  # Visualize all chains

plot(nimble_out_TempDLM_SRER[, c("sd_add")], main = "SRER: sd_add")  # Visualize all chains

burnin_TempDLM_SRER <- 2000  # Burn-in
nimble_burn_TempDLM_SRER <- window(nimble_out_TempDLM_SRER, start = burnin_TempDLM_SRER)  # Exclude burn-in
plot(nimble_burn_TempDLM_SRER[, c("b1")], main = "SRER: b1 (burn-in removed)")  # Visualize chains w/o burn-in

plot(nimble_burn_TempDLM_SRER[, c("sd_add")], main = "SRER: sd_add (burn-in removed)")  # Visualize chains w/o burn-in

Examine Brooks-Gelman-Rubin convergence diagnostic for temperature DLM.

Since the upper confidence interval (C.I.) for the Brooks-Gelman-Rubin convergence diagnostic is less than 1.1 for both parameters, we can conclude the chains have converged for BART.

# BART

gelman.diag(nimble_burn_TempDLM_BART[, "b1"])
## Potential scale reduction factors:
## 
##      Point est. Upper C.I.
## [1,]          1       1.01
gelman.diag(nimble_burn_TempDLM_BART[, "sd_add"])
## Potential scale reduction factors:
## 
##      Point est. Upper C.I.
## [1,]       1.02       1.07

Since at least one of the upper confidence intervals for the Brooks-Gelman-Rubin convergence diagnostic is greater than 1.1, we can conclude the chains have not converged for KONZ. This may be due to the large gaps in the data for the fitting period.

# KONZ

gelman.diag(nimble_burn_TempDLM_KONZ[, "b1"])
## Potential scale reduction factors:
## 
##      Point est. Upper C.I.
## [1,]       1.04       1.14
gelman.diag(nimble_burn_TempDLM_KONZ[, "sd_add"])
## Potential scale reduction factors:
## 
##      Point est. Upper C.I.
## [1,]       1.17       1.49

Since the upper confidence interval (C.I.) for the Brooks-Gelman-Rubin convergence diagnostic is less than 1.1 for both parameters, we can conclude the chains have converged for OSBS.

# OSBS

gelman.diag(nimble_burn_TempDLM_OSBS[, "b1"])
## Potential scale reduction factors:
## 
##      Point est. Upper C.I.
## [1,]          1       1.01
gelman.diag(nimble_burn_TempDLM_OSBS[, "sd_add"])
## Potential scale reduction factors:
## 
##      Point est. Upper C.I.
## [1,]       1.03       1.09

Since at least one of the upper confidence intervals for the Brooks-Gelman-Rubin convergence diagnostic is greater than 1.1, we can conclude the chains have not converged for SRER. This may be due to the large gaps in the data for the fitting period.

# SRER

gelman.diag(nimble_burn_TempDLM_SRER[, "b1"])
## Potential scale reduction factors:
## 
##      Point est. Upper C.I.
## [1,]          1       1.01
gelman.diag(nimble_burn_TempDLM_SRER[, "sd_add"])
## Potential scale reduction factors:
## 
##      Point est. Upper C.I.
## [1,]       1.09       1.29

Convert temperature DLM output using tidybayes.

# BART

chain_TempDLM_BART <- nimble_burn_TempDLM_BART %>%
  spread_draws(y[day], x[day], b1, sd_add)

# KONZ

chain_TempDLM_KONZ <- nimble_burn_TempDLM_KONZ %>%
  spread_draws(y[day], x[day], b1, sd_add)

# OSBS

chain_TempDLM_OSBS <- nimble_burn_TempDLM_OSBS %>%
  spread_draws(y[day], x[day], b1, sd_add)

# SRER

chain_TempDLM_SRER <- nimble_burn_TempDLM_SRER %>%
  spread_draws(y[day], x[day], b1, sd_add)

Plot temperature DLM versus observations.

plot_colors <- c("#F8766D", "#7CAE00", "#00BFC4", "#C77CFF")

# BART

plot_chain_TempDLM_BART <- chain_TempDLM_BART %>%
  group_by(day) %>%
  summarise(mean = mean(x),
            lower = quantile(x, 0.025),
            upper = quantile(x, 0.975),
            .groups = "drop") %>%
  mutate(time = target_data_BART_sub$time)

plot_TempDLM_BART <- ggplot(data = plot_chain_TempDLM_BART, aes(x = time, y = mean)) +
  geom_line() +
  geom_ribbon(aes(ymin = lower, ymax = upper),
              alpha = 0.2,
              color = plot_colors[1],
              fill = plot_colors[1]) +
  geom_point(data = target_data_BART_sub, aes(x = time, y = nee), color = "black") +
  labs(title = paste("Site: BART | Time Period: ", substr(target_data_BART_sub$time[1], 1, 10), "to", substr(target_data_BART_sub$time[subset_length_timesteps], 1, 10)),
       x = "Time (UTC)",
       y = expression(paste("NEE (",mu, "mol" ~ CO[2] ~ m^-2 ~ s^-1,")"))) +
  theme_bw()

# KONZ

plot_chain_TempDLM_KONZ <- chain_TempDLM_KONZ %>%
  group_by(day) %>%
  summarise(mean = mean(x),
            lower = quantile(x, 0.025),
            upper = quantile(x, 0.975),
            .groups = "drop") %>%
  mutate(time = target_data_KONZ_sub$time)

plot_TempDLM_KONZ <- ggplot(data = plot_chain_TempDLM_KONZ, aes(x = time, y = mean)) +
  geom_line() +
  geom_ribbon(aes(ymin = lower, ymax = upper),
              alpha = 0.2,
              color = plot_colors[2],
              fill = plot_colors[2]) +
  geom_point(data = target_data_KONZ_sub, aes(x = time, y = nee), color = "black") +
  labs(title = paste("Site: KONZ | Time Period: ", substr(target_data_KONZ_sub$time[1], 1, 10), "to", substr(target_data_KONZ_sub$time[subset_length_timesteps], 1, 10)),
       x = "Time (UTC)",
       y = expression(paste("NEE (",mu, "mol" ~ CO[2] ~ m^-2 ~ s^-1,")"))) +
  theme_bw()

# OSBS

plot_chain_TempDLM_OSBS <- chain_TempDLM_OSBS %>%
  group_by(day) %>%
  summarise(mean = mean(x),
            lower = quantile(x, 0.025),
            upper = quantile(x, 0.975),
            .groups = "drop") %>%
  mutate(time = target_data_OSBS_sub$time)

plot_TempDLM_OSBS <- ggplot(data = plot_chain_TempDLM_OSBS, aes(x = time, y = mean)) +
  geom_line() +
  geom_ribbon(aes(ymin = lower, ymax = upper),
              alpha = 0.2,
              color = plot_colors[3],
              fill = plot_colors[3]) +
  geom_point(data = target_data_OSBS_sub, aes(x = time, y = nee), color = "black") +
  labs(title = paste("Site: OSBS | Time Period: ", substr(target_data_OSBS_sub$time[1], 1, 10), "to", substr(target_data_OSBS_sub$time[subset_length_timesteps], 1, 10)),
       x = "Time (UTC)",
       y = expression(paste("NEE (",mu, "mol" ~ CO[2] ~ m^-2 ~ s^-1,")"))) +
  theme_bw()

# SRER

plot_chain_TempDLM_SRER <- chain_TempDLM_SRER %>%
  group_by(day) %>%
  summarise(mean = mean(x),
            lower = quantile(x, 0.025),
            upper = quantile(x, 0.975),
            .groups = "drop") %>%
  mutate(time = target_data_SRER_sub$time)

plot_TempDLM_SRER <- ggplot(data = plot_chain_TempDLM_SRER, aes(x = time, y = mean)) +
  geom_line() +
  geom_ribbon(aes(ymin = lower, ymax = upper),
              alpha = 0.2,
              color = plot_colors[4],
              fill = plot_colors[4]) +
  geom_point(data = target_data_SRER_sub, aes(x = time, y = nee), color = "black") +
  labs(title = paste("Site: SRER | Time Period: ", substr(target_data_SRER_sub$time[1], 1, 10), "to", substr(target_data_SRER_sub$time[subset_length_timesteps], 1, 10)),
       x = "Time (UTC)",
       y = expression(paste("NEE (",mu, "mol" ~ CO[2] ~ m^-2 ~ s^-1,")"))) +
  theme_bw()

plot_TempDLM_BART

plot_TempDLM_KONZ

plot_TempDLM_OSBS

plot_TempDLM_SRER

Generate forecasts.

# Forecast preliminaries

ensemble_size <- 1000  # Specify ensemble size

forecast_length <- 35 * 24 * 2 + 1 # Number of time steps in forecast

NOAA_temp_forecast <- read.csv(text = getURL("https://raw.githubusercontent.com/eco4cast-class-VT/VT_NEET/master/temp_fc30.csv"))  # Read in NOAA GEFS forecasts

Generate forecast for BART.

# Sample from temperature forecasts

NOAA_temp_forecast_site <- filter(NOAA_temp_forecast, siteID == "BART")  # Select NOAA forecasts for BART

temp_ensembles <- sample(unique(NOAA_temp_forecast_site$ensemble),
                         size = ensemble_size,
                         replace = TRUE)

forecast_temp <- matrix(NA, nrow = forecast_length, ncol = ensemble_size)  # Initialize forecast temperature matrix

for(i in 1:ensemble_size){
  
  forecast_temp[, i] <- filter(NOAA_temp_forecast_site, ensemble == temp_ensembles[i])$tempC
  
}

# Sample from parameter posterior distribution

chain_TempDLM_last_time_step <- filter(chain_TempDLM_BART,  # Create tibble for MCMC results for last time step
                                       day == max(chain_TempDLM_BART$day))

posterior_sample_indices <- sample(x = 1:length(chain_TempDLM_last_time_step$x),
                                   size = ensemble_size,
                                   replace = TRUE)

NEE_ic <- chain_TempDLM_last_time_step$x[posterior_sample_indices]  # Extract last latent state value
b1 <- chain_TempDLM_last_time_step$b1[posterior_sample_indices]  # Extract sampled b1 values
sd_add <- chain_TempDLM_last_time_step$sd_add[posterior_sample_indices]  # Extract sampled sd_add values

# Specify initial conditions

NEE_state <- matrix(NA, nrow = forecast_length, ncol = ensemble_size)  # Initialize state variable matrix
NEE_obs <- matrix(NA, nrow = forecast_length, ncol = ensemble_size)  # Initialize observed NEE matrix

NEE_state[1, ] <- NEE_ic  # Specify initial condition for state variable

# Generate forecast

for(m in 1:ensemble_size){
  
  for(t in 1:forecast_length){

    if(t > 1){  # Condition set b/c initial condition already specified for state variable
      
      # Note:
      # NEE_state = real NEE (state variable)
      # NEE_obs = observed NEE
      
      pred <- NEE_state[t - 1, m] + b1[m] * forecast_temp[t, m]
      NEE_state[t, m] <- rnorm(1, mean = pred, sd = sd_add[m])
    
    }
    
    if(NEE_state[t, m] >= 0){  # Calculate sd_obs if NEE is positive
      
      sd_obs <- target_data_BART_sub$nee_sd_intercept[1] + target_data_BART_sub$nee_sd_slopeP[1] * NEE_state[t, m]  # Note: Since intercept and slope do not change, can just use first value of intercept and slope values
      
    }else{  # Calculate sd_obs if NEE is negative
      
      sd_obs <- target_data_BART_sub$nee_sd_intercept[1] + target_data_BART_sub$nee_sd_slopeN[1] * NEE_state[t, m]
          
    }
      
    NEE_obs[t, m] <- rnorm(1, mean = NEE_state[t, m], sd = sd_obs * sd_obs_mod_BART)
    
  }
  
}

nee <- as.vector(t(NEE_obs))  # Convert matrix to vector for long format (transposed so that vector is organized so that each ensemble member is nested within each time step)

time <- rep(NA, length(nee))  # Initialize time vector

NOAA_forecast_times <- unique(NOAA_temp_forecast_site$time)  # Extract forecast times

# Assign forecast times

for(i in 1:forecast_length){
  
  time[((i*ensemble_size) - (ensemble_size - 1)):(i*ensemble_size)] <- NOAA_forecast_times[i]
  
}

siteID <- rep("BART", length.out = length(nee))  # Create siteID vector

ensemble <- rep(seq(from = 1, to = ensemble_size, by = 1), times = forecast_length)  # Create ensemble vector

forecast <- rep(1, length.out = length(nee))  # Create forecast vector

data_assimilation <- rep(0, length.out = length(nee))  # Create forecast vector

le <- rep(NA, length.out = length(nee))  # Create le vector

vswc <- rep(NA, length.out = length(nee))  # Create vswc vector

NEE_forecast_BART <- data.frame(time, siteID, ensemble, forecast, data_assimilation, nee, le, vswc)  # Create data frame with forecast data

NEE_forecast_BART$time <- ymd_hms(NEE_forecast_BART$time, tz = "UTC")  # Specify time column as POSIXct

Generate forecast for KONZ.

# Sample from temperature forecasts

NOAA_temp_forecast_site <- filter(NOAA_temp_forecast, siteID == "KONZ")  # Select NOAA forecasts for KONZ

temp_ensembles <- sample(unique(NOAA_temp_forecast_site$ensemble),
                         size = ensemble_size,
                         replace = TRUE)

forecast_temp <- matrix(NA, nrow = forecast_length, ncol = ensemble_size)  # Initialize forecast temperature matrix

for(i in 1:ensemble_size){
  
  forecast_temp[, i] <- filter(NOAA_temp_forecast_site, ensemble == temp_ensembles[i])$tempC
  
}

# Sample from parameter posterior distribution

chain_TempDLM_last_time_step <- filter(chain_TempDLM_KONZ,  # Create tibble for MCMC results for last time step
                                       day == max(chain_TempDLM_KONZ$day))

posterior_sample_indices <- sample(x = 1:length(chain_TempDLM_last_time_step$x),
                                   size = ensemble_size,
                                   replace = TRUE)

NEE_ic <- chain_TempDLM_last_time_step$x[posterior_sample_indices]  # Extract last latent state value
b1 <- chain_TempDLM_last_time_step$b1[posterior_sample_indices]  # Extract sampled b1 values
sd_add <- chain_TempDLM_last_time_step$sd_add[posterior_sample_indices]  # Extract sampled sd_add values

# Specify initial conditions

NEE_state <- matrix(NA, nrow = forecast_length, ncol = ensemble_size)  # Initialize state variable matrix
NEE_obs <- matrix(NA, nrow = forecast_length, ncol = ensemble_size)  # Initialize observed NEE matrix

NEE_state[1, ] <- NEE_ic  # Specify initial condition for state variable

# Generate forecast

for(m in 1:ensemble_size){
  
  for(t in 1:forecast_length){

    if(t > 1){  # Condition set b/c initial condition already specified for state variable
      
      # Note:
      # NEE_state = real NEE (state variable)
      # NEE_obs = observed NEE
      
      pred <- NEE_state[t - 1, m] + b1[m] * forecast_temp[t, m]
      NEE_state[t, m] <- rnorm(1, mean = pred, sd = sd_add[m])
    
    }
    
    if(NEE_state[t, m] >= 0){  # Calculate sd_obs if NEE is positive
      
      sd_obs <- target_data_KONZ_sub$nee_sd_intercept[1] + target_data_KONZ_sub$nee_sd_slopeP[1] * NEE_state[t, m]  # Note: Since intercept and slope do not change, can just use first value of intercept and slope values
      
    }else{  # Calculate sd_obs if NEE is negative
      
      sd_obs <- target_data_KONZ_sub$nee_sd_intercept[1] + target_data_KONZ_sub$nee_sd_slopeN[1] * NEE_state[t, m]
          
    }
      
    NEE_obs[t, m] <- rnorm(1, mean = NEE_state[t, m], sd = sd_obs * sd_obs_mod_KONZ)
    
  }
  
}

nee <- as.vector(t(NEE_obs))  # Convert matrix to vector for long format (transposed so that vector is organized so that each ensemble member is nested within each time step)

time <- rep(NA, length(nee))  # Initialize time vector

NOAA_forecast_times <- unique(NOAA_temp_forecast_site$time)  # Extract forecast times

# Assign forecast times

for(i in 1:forecast_length){
  
  time[((i*ensemble_size) - (ensemble_size - 1)):(i*ensemble_size)] <- NOAA_forecast_times[i]
  
}

siteID <- rep("KONZ", length.out = length(nee))  # Create siteID vector

ensemble <- rep(seq(from = 1, to = ensemble_size, by = 1), times = forecast_length)  # Create ensemble vector

forecast <- rep(1, length.out = length(nee))  # Create forecast vector

data_assimilation <- rep(0, length.out = length(nee))  # Create forecast vector

le <- rep(NA, length.out = length(nee))  # Create le vector

vswc <- rep(NA, length.out = length(nee))  # Create vswc vector

NEE_forecast_KONZ <- data.frame(time, siteID, ensemble, forecast, data_assimilation, nee, le, vswc)  # Create data frame with forecast data

NEE_forecast_KONZ$time <- ymd_hms(NEE_forecast_KONZ$time, tz = "UTC")  # Specify time column as POSIXct

Generate forecast for OSBS.

# Sample from temperature forecasts

NOAA_temp_forecast_site <- filter(NOAA_temp_forecast, siteID == "OSBS")  # Select NOAA forecasts for OSBS

temp_ensembles <- sample(unique(NOAA_temp_forecast_site$ensemble),
                         size = ensemble_size,
                         replace = TRUE)

forecast_temp <- matrix(NA, nrow = forecast_length, ncol = ensemble_size)  # Initialize forecast temperature matrix

for(i in 1:ensemble_size){
  
  forecast_temp[, i] <- filter(NOAA_temp_forecast_site, ensemble == temp_ensembles[i])$tempC
  
}

# Sample from parameter posterior distribution

chain_TempDLM_last_time_step <- filter(chain_TempDLM_OSBS,  # Create tibble for MCMC results for last time step
                                       day == max(chain_TempDLM_OSBS$day))

posterior_sample_indices <- sample(x = 1:length(chain_TempDLM_last_time_step$x),
                                   size = ensemble_size,
                                   replace = TRUE)

NEE_ic <- chain_TempDLM_last_time_step$x[posterior_sample_indices]  # Extract last latent state value
b1 <- chain_TempDLM_last_time_step$b1[posterior_sample_indices]  # Extract sampled b1 values
sd_add <- chain_TempDLM_last_time_step$sd_add[posterior_sample_indices]  # Extract sampled sd_add values

# Specify initial conditions

NEE_state <- matrix(NA, nrow = forecast_length, ncol = ensemble_size)  # Initialize state variable matrix
NEE_obs <- matrix(NA, nrow = forecast_length, ncol = ensemble_size)  # Initialize observed NEE matrix

NEE_state[1, ] <- NEE_ic  # Specify initial condition for state variable

# Generate forecast

for(m in 1:ensemble_size){
  
  for(t in 1:forecast_length){

    if(t > 1){  # Condition set b/c initial condition already specified for state variable
      
      # Note:
      # NEE_state = real NEE (state variable)
      # NEE_obs = observed NEE
      
      pred <- NEE_state[t - 1, m] + b1[m] * forecast_temp[t, m]
      NEE_state[t, m] <- rnorm(1, mean = pred, sd = sd_add[m])
    
    }
    
    if(NEE_state[t, m] >= 0){  # Calculate sd_obs if NEE is positive
      
      sd_obs <- target_data_OSBS_sub$nee_sd_intercept[1] + target_data_OSBS_sub$nee_sd_slopeP[1] * NEE_state[t, m]  # Note: Since intercept and slope do not change, can just use first value of intercept and slope values
      
    }else{  # Calculate sd_obs if NEE is negative
      
      sd_obs <- target_data_OSBS_sub$nee_sd_intercept[1] + target_data_OSBS_sub$nee_sd_slopeN[1] * NEE_state[t, m]
          
    }
      
    NEE_obs[t, m] <- rnorm(1, mean = NEE_state[t, m], sd = sd_obs * sd_obs_mod_OSBS)
    
  }
  
}

nee <- as.vector(t(NEE_obs))  # Convert matrix to vector for long format (transposed so that vector is organized so that each ensemble member is nested within each time step)

time <- rep(NA, length(nee))  # Initialize time vector

NOAA_forecast_times <- unique(NOAA_temp_forecast_site$time)  # Extract forecast times

# Assign forecast times

for(i in 1:forecast_length){
  
  time[((i*ensemble_size) - (ensemble_size - 1)):(i*ensemble_size)] <- NOAA_forecast_times[i]
  
}

siteID <- rep("OSBS", length.out = length(nee))  # Create siteID vector

ensemble <- rep(seq(from = 1, to = ensemble_size, by = 1), times = forecast_length)  # Create ensemble vector

forecast <- rep(1, length.out = length(nee))  # Create forecast vector

data_assimilation <- rep(0, length.out = length(nee))  # Create forecast vector

le <- rep(NA, length.out = length(nee))  # Create le vector

vswc <- rep(NA, length.out = length(nee))  # Create vswc vector

NEE_forecast_OSBS <- data.frame(time, siteID, ensemble, forecast, data_assimilation, nee, le, vswc)  # Create data frame with forecast data

NEE_forecast_OSBS$time <- ymd_hms(NEE_forecast_OSBS$time, tz = "UTC")  # Specify time column as POSIXct

Generate forecast for SRER.

# Sample from temperature forecasts

NOAA_temp_forecast_site <- filter(NOAA_temp_forecast, siteID == "SRER")  # Select NOAA forecasts for SRER

temp_ensembles <- sample(unique(NOAA_temp_forecast_site$ensemble),
                         size = ensemble_size,
                         replace = TRUE)

forecast_temp <- matrix(NA, nrow = forecast_length, ncol = ensemble_size)  # Initialize forecast temperature matrix

for(i in 1:ensemble_size){
  
  forecast_temp[, i] <- filter(NOAA_temp_forecast_site, ensemble == temp_ensembles[i])$tempC
  
}

# Sample from parameter posterior distribution

chain_TempDLM_last_time_step <- filter(chain_TempDLM_SRER,  # Create tibble for MCMC results for last time step
                                       day == max(chain_TempDLM_SRER$day))

posterior_sample_indices <- sample(x = 1:length(chain_TempDLM_last_time_step$x),
                                   size = ensemble_size,
                                   replace = TRUE)

NEE_ic <- chain_TempDLM_last_time_step$x[posterior_sample_indices]  # Extract last latent state value
b1 <- chain_TempDLM_last_time_step$b1[posterior_sample_indices]  # Extract sampled b1 values
sd_add <- chain_TempDLM_last_time_step$sd_add[posterior_sample_indices]  # Extract sampled sd_add values

# Specify initial conditions

NEE_state <- matrix(NA, nrow = forecast_length, ncol = ensemble_size)  # Initialize state variable matrix
NEE_obs <- matrix(NA, nrow = forecast_length, ncol = ensemble_size)  # Initialize observed NEE matrix

NEE_state[1, ] <- NEE_ic  # Specify initial condition for state variable

# Generate forecast

for(m in 1:ensemble_size){
  
  for(t in 1:forecast_length){

    if(t > 1){  # Condition set b/c initial condition already specified for state variable
      
      # Note:
      # NEE_state = real NEE (state variable)
      # NEE_obs = observed NEE
      
      pred <- NEE_state[t - 1, m] + b1[m] * forecast_temp[t, m]
      NEE_state[t, m] <- rnorm(1, mean = pred, sd = sd_add[m])
    
    }
    
    if(NEE_state[t, m] >= 0){  # Calculate sd_obs if NEE is positive
      
      sd_obs <- target_data_SRER_sub$nee_sd_intercept[1] + target_data_SRER_sub$nee_sd_slopeP[1] * NEE_state[t, m]  # Note: Since intercept and slope do not change, can just use first value of intercept and slope values
      
    }else{  # Calculate sd_obs if NEE is negative
      
      sd_obs <- target_data_SRER_sub$nee_sd_intercept[1] + target_data_SRER_sub$nee_sd_slopeN[1] * NEE_state[t, m]
          
    }
      
    NEE_obs[t, m] <- rnorm(1, mean = NEE_state[t, m], sd = sd_obs * sd_obs_mod_SRER)
    
  }
  
}

nee <- as.vector(t(NEE_obs))  # Convert matrix to vector for long format (transposed so that vector is organized so that each ensemble member is nested within each time step)

time <- rep(NA, length(nee))  # Initialize time vector

NOAA_forecast_times <- unique(NOAA_temp_forecast_site$time)  # Extract forecast times

# Assign forecast times

for(i in 1:forecast_length){
  
  time[((i*ensemble_size) - (ensemble_size - 1)):(i*ensemble_size)] <- NOAA_forecast_times[i]
  
}

siteID <- rep("SRER", length.out = length(nee))  # Create siteID vector

ensemble <- rep(seq(from = 1, to = ensemble_size, by = 1), times = forecast_length)  # Create ensemble vector

forecast <- rep(1, length.out = length(nee))  # Create forecast vector

data_assimilation <- rep(0, length.out = length(nee))  # Create forecast vector

le <- rep(NA, length.out = length(nee))  # Create le vector

vswc <- rep(NA, length.out = length(nee))  # Create vswc vector

NEE_forecast_SRER <- data.frame(time, siteID, ensemble, forecast, data_assimilation, nee, le, vswc)  # Create data frame with forecast data

NEE_forecast_SRER$time <- ymd_hms(NEE_forecast_SRER$time, tz = "UTC")  # Specify time column as POSIXct

Compile forecasts and write output to .csv file.

NEE_forecast <- rbind(NEE_forecast_BART,
                      NEE_forecast_KONZ,
                      NEE_forecast_OSBS,
                      NEE_forecast_SRER)

write.csv(NEE_forecast, file = "terrestrial_30min-2021-03-01-VT_NEET.csv")

Plot forecasts (with fitted model and observations).

# BART

plot_NEE_forecast_BART <- NEE_forecast_BART %>%
  group_by(time) %>%
  summarise(mean = mean(nee),
            lower = quantile(nee, 0.025),
            upper = quantile(nee, 0.975),
            .groups = "drop")

plot_TempDLM_BART <- ggplot() +
  geom_line(data = plot_chain_TempDLM_BART, aes(x = time, y = mean, color = "1")) +
  geom_line(data = plot_NEE_forecast_BART, aes(x = time, y = mean, color = "2")) +
  geom_ribbon(data = plot_chain_TempDLM_BART, aes(x = time, ymin = lower, ymax = upper, color = "1", fill = "1"),
              alpha = 0.2) +
  geom_ribbon(data = plot_NEE_forecast_BART, aes(x = time, ymin = lower, ymax = upper, color = "2", fill = "2"),
              alpha = 0.2) +
  geom_point(data = target_data_BART_sub, aes(x = time, y = nee), color = "black") +
  scale_fill_manual(name = "Period", values = c("gray", plot_colors[1]), labels = c("Historical", "Forecast")) +
  scale_color_manual(name = "Period", values = c("gray", plot_colors[1]), labels = c("Historical", "Forecast")) +
  labs(title = paste("Site: BART | Time Period: ", substr(plot_chain_TempDLM_BART$time[1], 1, 10), "to", substr(plot_NEE_forecast_BART$time[length(plot_NEE_forecast_BART$time)], 1, 10)),
       x = "Time (UTC)",
       y = expression(paste("NEE (",mu, "mol" ~ CO[2] ~ m^-2 ~ s^-1,")"))) +
  theme_bw()

# KONZ

plot_NEE_forecast_KONZ <- NEE_forecast_KONZ %>%
  group_by(time) %>%
  summarise(mean = mean(nee),
            lower = quantile(nee, 0.025),
            upper = quantile(nee, 0.975),
            .groups = "drop")

plot_TempDLM_KONZ <- ggplot() +
  geom_line(data = plot_chain_TempDLM_KONZ, aes(x = time, y = mean, color = "1")) +
  geom_line(data = plot_NEE_forecast_KONZ, aes(x = time, y = mean, color = "2")) +
  geom_ribbon(data = plot_chain_TempDLM_KONZ, aes(x = time, ymin = lower, ymax = upper, color = "1", fill = "1"),
              alpha = 0.2) +
  geom_ribbon(data = plot_NEE_forecast_KONZ, aes(x = time, ymin = lower, ymax = upper, color = "2", fill = "2"),
              alpha = 0.2) +
  geom_point(data = target_data_KONZ_sub, aes(x = time, y = nee), color = "black") +
  scale_fill_manual(name = "Period", values = c("gray", plot_colors[2]), labels = c("Historical", "Forecast")) +
  scale_color_manual(name = "Period", values = c("gray", plot_colors[2]), labels = c("Historical", "Forecast")) +
  labs(title = paste("Site: KONZ | Time Period: ", substr(plot_chain_TempDLM_KONZ$time[1], 1, 10), "to", substr(plot_NEE_forecast_KONZ$time[length(plot_NEE_forecast_KONZ$time)], 1, 10)),
       x = "Time (UTC)",
       y = expression(paste("NEE (",mu, "mol" ~ CO[2] ~ m^-2 ~ s^-1,")"))) +
  theme_bw()

# OSBS

plot_NEE_forecast_OSBS <- NEE_forecast_OSBS %>%
  group_by(time) %>%
  summarise(mean = mean(nee),
            lower = quantile(nee, 0.025),
            upper = quantile(nee, 0.975),
            .groups = "drop")

plot_TempDLM_OSBS <- ggplot() +
  geom_line(data = plot_chain_TempDLM_OSBS, aes(x = time, y = mean, color = "1")) +
  geom_line(data = plot_NEE_forecast_OSBS, aes(x = time, y = mean, color = "2")) +
  geom_ribbon(data = plot_chain_TempDLM_OSBS, aes(x = time, ymin = lower, ymax = upper, color = "1", fill = "1"),
              alpha = 0.2) +
  geom_ribbon(data = plot_NEE_forecast_OSBS, aes(x = time, ymin = lower, ymax = upper, color = "2", fill = "2"),
              alpha = 0.2) +
  geom_point(data = target_data_OSBS_sub, aes(x = time, y = nee), color = "black") +
  scale_fill_manual(name = "Period", values = c("gray", plot_colors[3]), labels = c("Historical", "Forecast")) +
  scale_color_manual(name = "Period", values = c("gray", plot_colors[3]), labels = c("Historical", "Forecast")) +
  labs(title = paste("Site: OSBS | Time Period: ", substr(plot_chain_TempDLM_OSBS$time[1], 1, 10), "to", substr(plot_NEE_forecast_OSBS$time[length(plot_NEE_forecast_OSBS$time)], 1, 10)),
       x = "Time (UTC)",
       y = expression(paste("NEE (",mu, "mol" ~ CO[2] ~ m^-2 ~ s^-1,")"))) +
  theme_bw()

# SRER

plot_NEE_forecast_SRER <- NEE_forecast_SRER %>%
  group_by(time) %>%
  summarise(mean = mean(nee),
            lower = quantile(nee, 0.025),
            upper = quantile(nee, 0.975),
            .groups = "drop")

plot_TempDLM_SRER <- ggplot() +
  geom_line(data = plot_chain_TempDLM_SRER, aes(x = time, y = mean, color = "1")) +
  geom_line(data = plot_NEE_forecast_SRER, aes(x = time, y = mean, color = "2")) +
  geom_ribbon(data = plot_chain_TempDLM_SRER, aes(x = time, ymin = lower, ymax = upper, color = "1", fill = "1"),
              alpha = 0.2) +
  geom_ribbon(data = plot_NEE_forecast_SRER, aes(x = time, ymin = lower, ymax = upper, color = "2", fill = "2"),
              alpha = 0.2) +
  geom_point(data = target_data_SRER_sub, aes(x = time, y = nee), color = "black") +
  scale_fill_manual(name = "Period", values = c("gray", plot_colors[4]), labels = c("Historical", "Forecast")) +
  scale_color_manual(name = "Period", values = c("gray", plot_colors[4]), labels = c("Historical", "Forecast")) +
  labs(title = paste("Site: SRER | Time Period: ", substr(plot_chain_TempDLM_SRER$time[1], 1, 10), "to", substr(plot_NEE_forecast_SRER$time[length(plot_NEE_forecast_SRER$time)], 1, 10)),
       x = "Time (UTC)",
       y = expression(paste("NEE (",mu, "mol" ~ CO[2] ~ m^-2 ~ s^-1,")"))) +
  theme_bw()

plot_TempDLM_BART
## Warning: Removed 606 rows containing missing values (geom_point).

plot_TempDLM_KONZ
## Warning: Removed 794 rows containing missing values (geom_point).

plot_TempDLM_OSBS
## Warning: Removed 583 rows containing missing values (geom_point).

plot_TempDLM_SRER
## Warning: Removed 889 rows containing missing values (geom_point).